#------------------------------------------------------------------------------*
# Backsliding by Surprise: The Rise of Chavismo		  						               *
# Dorothy Kronick, Barry Plunkett, Pedro Rodríguez	                           *
# Corresponding author: Dorothy Kronick (kronick@berkeley.edu)                 *
# -----------------------------------------------------------------------------*

# location of the master Replication folder
setwd("/Users/prodriguezsosa/Dropbox/Research/Replication")

# ----------------------------
# table 1
# ----------------------------

# pre-process data (remove accents in spanish)
#source("code_R/granier_data.R")

# run dictionary expansion
source("code_R/dictionary_expansion.R")

# create table
source("code_R/tb1.R")
dict_counts

# ----------------------------
# figure A5 and A6
# ----------------------------
# figures A5 and A6 use the outputs from our topic model estimation. In order to select the optimal 
# number of topics k, we ran 57 topic models, each with a different number of topics --starting with 
# a minimum of k = 25 topics, increasing (in increments of 5) to 305. To run this number of topic models 
# we used NYU's high performance cluster (HPC). The scripts include:
#
# pre-processing scripts (these scripts take 1hr + to run)
#source("code_R/preProcessing.R") 
#source("code_R/collocations.R") # adds collocations

# the following scripts were used to run the topic models on NYU's HPC
# i. topic_model.R # the main script
# ii. topic_model.txt # the hpc script
# iii. helper.sh # a helper file to loop over differen number of topics

# pairwise information divergence to select optimal number of topics
#source("code_R/optimal_k.R")

# create figures
source("code_R/fgA5.R")
source("code_R/fgA6.R")

# ----------------------------
# delete figures
# we do not delete pre-processed text data
# nor topic model outptus, given the time
# and compute resources they require
# ----------------------------
unlink("figures/fgA5a.pdf")
unlink("figures/fgA5b.pdf")
unlink("figures/fgA6a.pdf")
unlink("figures/fgA6b.pdf")
unlink("data/outputs/nns_constituyente_all.xlsx")
unlink("data/outputs/nns_pobreza_all.xlsx")
#unlink("data/outputs/granier_data.rds")
#unlink("data/outputs/stopwords_es.rds")
#unlink("data/outputs/chavez_discourse_preprocessed.rds")
#unlink("data/outputs/chavez_discourse_preprocessed_colloc.rds")

